capture log close
log 	using 20250822-brfss2015-dataprep, replace text
version 15.1
clear 	all
macro 	drop _all
set 	linesize 80
set 	scheme plottig
set		more off
set		varabbrev off
set 	rmsg on

// #0. Document this do-file
	local today : di %tdCY/N/D daily("$S_DATE", "DMY")
	tokenize `today',parse("/")
	local year `1'
	local month `3'
	local day `5'

	local who "Ben Cheung"
	local pgm "20250822-brfss2015-dataprep"
	local date "`year'-`month'-`day'"
	local tag "`pgm'.do by `who' on `date'"
	display "The tag is: `tag'"


	local today : di %tdCYND daily("$S_DATE", "DMY")


	*Check for installed packages
	capture which scheme-plottig.scheme
	if _rc!=0 ssc install blidschemes

	capture which confirmdir
	if _rc!=0 ssc install confirmdir

	capture which estout
	if _rc!=0 ssc install estout

	capture which sum2docx
	if _rc!=0 ssc install sum2docx

	capture which tab2xl
	if _rc!=0 net install http://www.stata.com/users/kcrow/tab2xl, replace

	set scheme plottig


local folder "Original data"
confirmdir "`folder'"
if `r(confirmdir)'==170 {
	mkdir "`folder'"
}



local folder "Data Ready for Analysis"
confirmdir "`folder'"
if `r(confirmdir)'==170 {
	mkdir "`folder'"
}




*Define custom program to show command in loop
program define pe
	version 15.1
	if `"`0'"' != "" {
		display("")
		display as input `". `0'"'
		`0'
		display("")
	}
end





// #1. Data management
import sasxport "Original Data/LLCP2015.XPT",clear




// #2. Data management
*Survey setup in Stata: https://www.cdc.gov/copd/pdfs/BRFSS_COPD_Syntax.pdf

*calculated variables for race vary across years:
*_racegr (1993-2000): 1=Non-hispanic white, 2=Non-hispanic black, 3=Hispanic, 4=Other, 9=missing (https://www.cdc.gov/brfss/annual_data/2000/pdf/codebook_00.pdf)
*_racegr2 (2001-2012): 1=Non-hispanic white, 2=Non-hispanic black, 3=Non-hispanic other, 4=Non-hispanic multiracial, 5=Hispanic, 9=missing (https://www.cdc.gov/brfss/annual_data/2001/pdf/codebook_01.pdf)
*_racegr3 (2013-2019): 1=Non-hispanic white, 2=Non-hispanic black, 3=Non-hispanic other, 4=Non-hispanic multiracial, 5=Hispanic, 9=missing (https://www.cdc.gov/brfss/annual_data/2019/pdf/codebook19_llcp-v2-508.HTML)


*race (1993-2000): 1=Non-hispanic white, 2=Non-hispanic black, 3=Hispanic-white, 4=Hispanic-black, 5=Other hispanic, 6=Non-hispanic asian/Pacific Islander, 7=Non-hispanic Native American, 8=Non-hispanic other, 9=missing (https://www.cdc.gov/brfss/annual_data/1993/pdf/layout93.pdf)
*race2 (2001-2012): 1=Non-hispanic white, 2=Non-hispanic black, 3=Non-hispanic asian, 4=Non-hispanic Native Hawaiian/Pacific Islander, 5=Non-hispanic Native American, 6=Non-hispanic other, 7=Non-hispanic multiracial, 8=Hispanic, 9=missing (https://www.cdc.gov/brfss/annual_data/2001/pdf/riskfactor_01.pdf)
*_race (2013-2019): 1=Non-hispanic white, 2=Non-hispanic black, 3=Non-hispanic Native American, 4=Non-hispanic asian, 5=Non-hispanic Native Hawaiian/Pacific Islander, 6=Non-hispanic other, 7=Non-hispanic multiracial, 8=Hispanic, 9=missing (https://www.cdc.gov/brfss/annual_data/2013/pdf/2013_calculated_variables_version15.pdf)


*Starting from 2001, Hispanic was no longer further partitioned into different racial groups (i.e., Hispanic-black, Hispanic-white...).
*Since I need to construct Hispanic non-black and Hispanic Black as two racial/ethnic groups in the data, I need to obtain the original racial and ethnic groups variables to compute these two groups myself.
*_prace (2001-2012): 1=White, 2=Black/African American, 3=Asian, 4=Native Hawaiian/Pacific Islander, 5=American Indian/Alaskan Native, 6=Other, 7=No preferred race, 8=Multiracial but no preferred race, 77=Don't know/not sure, 99=Refused (https://www.cdc.gov/brfss/annual_data/2001/pdf/codebook_01.pdf)
*_prace1 (2013-2019): 1=White, 2=Black/African American, 3=American Indian/Alaskan Native, 4=Asian, 5=Native Hawaiian/Pacific Islander, 6=Other, 7=No preferred race, 8=Multiracial but no preferred race, 77=Don't know/not sure, 99=Refused (https://www.cdc.gov/brfss/annual_data/2013/pdf/CODEBOOK13_LLCP.pdf)
*hispanc2 (2001-2012): 1=Yes, 2=No, 7=Don't know/not sure, 9=Refused (https://www.cdc.gov/brfss/annual_data/2001/pdf/codebook_01.pdf)
*_hispanc (2013-2019): 1=Hispanic/Latino/Spanish origin, 2=Not of Hispanic/Latino/Spanish origin, 9=Don't know/refused (https://www.cdc.gov/brfss/annual_data/2013/pdf/CODEBOOK13_LLCP.pdf)



*survey weight:
*_finalwt (1993-2010): 9999999999=unknown (https://www.cdc.gov/brfss/annual_data/1993/pdf/layout93.pdf)
*_llcpwt (2011-2019): (https://www.cdc.gov/brfss/annual_data/2011/pdf/BRFSS2011_Analysis.pdf)


*strata:
*_ststr (1993-2019)

*PSU:
*_psu (1993-2019)

*Questionnaire version:
*_qstver (1997-2001): 1=Version 1, 2=Version 2, BLANK=Not applicable (https://www.cdc.gov/brfss/annual_data/1997/files/CODEBK97.TXT)
*_qstver (2002): No documentation and the categories are weird (https://www.cdc.gov/brfss/annual_data/2002/pdf/codebook_02.pdf)
*qstver (2003): 1=Version 1, 2=Version 2, BLANK=Missing (https://www.cdc.gov/brfss/annual_data/2003/pdf/Codebook_03.pdf)
*qstver (2004-2007): 0=Only version, 1=Version 1, 2=Version 2, BLANK=Missing (https://www.cdc.gov/brfss/annual_data/2004/pdf/Codebook_04.pdf)
*qstver (2009): 0=Only version, 1=Version 1, 2=Version 2, 3=Version 3, BLANK=Missing (https://www.cdc.gov/brfss/annual_data/2008/pdf/codebook08.pdf)
*qstver (2010): 10=Only version, 11=Version 1, 12=Version 2, 13=Version 3 (https://www.cdc.gov/brfss/annual_data/2010/pdf/codebook_10.pdf)
*qstver (2011): 10=Only version, 11=Version 1, 12=Version 2, 13=Version 3, 20=Only Version Cell Phone (https://www.cdc.gov/brfss/annual_data/2011/pdf/CODEBOOK11_LLCP.pdf)
*qstver (2012-2019): 10=Only version, 11=Version 1, 12=Version 2, 13=Version 3, 20=Only Version Cell Phone, 21=Version 1 Cell Phone, 22=Version 2 Cell Phone, 23=Version 3 Cell Phone (https://www.cdc.gov/brfss/annual_data/2012/pdf/CODEBOOK12_LLCP.pdf)



*age:
*age (1993-2012): 07=Don't know/not sure, 09=Refused
*_age80 (2013-2019): imputed age from 18 to 80


*employment status:
*employ (1993-2012):1=Employed, 2=Self-employed, 3=Out of work for more than 1 year, 4=Out of work for less than 1 year, 5=Homemaker, 6=Student, 7=Retired, 8=Unable to work, 9=Refused (https://www.cdc.gov/brfss/annual_data/1993/pdf/layout93.pdf)
*employ1 (2013-2019):1=Employed, 2=Self-employed, 3=Out of work for 1 year or more, 4=Out of work for less than 1 year, 5=Homemaker, 6=Student, 7=Retired, 8=Unable to work, 9=Refused (https://www.cdc.gov/brfss/annual_data/2013/pdf/CODEBOOK13_LLCP.pdf)

*sex:
*sex (1993-2015): 1=Male, 2=Female, 9=Refused (https://www.cdc.gov/brfss/annual_data/1993/pdf/layout93.pdf)
*sex1 (2018): 1=Male, 2=Female, 7=Don't know/not sure, 9=Refused (https://www.cdc.gov/brfss/annual_data/2018/pdf/codebook18_llcp-v2-508.pdf)
*sexvar (2019): 1=Male, 2=Female (https://www.cdc.gov/brfss/annual_data/2019/pdf/codebook19_llcp-v2-508.HTML)


*SRH:
*genhlth: 1=Excellent, 2=Very good, 3=Good, 4=Fair, 5=Poor, 7=Don't know/not sure, 9=Refused (https://www.cdc.gov/brfss/annual_data/1993/pdf/layout93.pdf)
*In 1996, Tennessee (FIPS: 47) didn't ask the SRH question.

*Number of days in last month with not good physical health:
*physhlth: 1, 2, 3... 30 days, 77=Don't know/not sure, 88=None, 99=Refused (https://www.cdc.gov/brfss/annual_data/2000/pdf/codebook_00.pdf)

*Number of days in last month with not good mental health:
*menthlth: 1, 2, 3... 30 days, 77=Don't know/not sure, 88=None, 99=Refused (https://www.cdc.gov/brfss/annual_data/2000/pdf/codebook_00.pdf)

*Number of days in last month keeping R from usual activities due to poor physical/mental health:
*poorhlth: 1, 2, 3... 30 days, 77=Don't know/not sure, 88=None, 99=Refused (https://www.cdc.gov/brfss/annual_data/2000/pdf/codebook_00.pdf)


*Asthma (ever told by doctor you have asthma):
*asthma (1999-2000): 1=Yes, 2=No, 7=Don't know/Not sure, 9=Refused, BLANK=skipped (https://www.cdc.gov/brfss/annual_data/2000/pdf/codebook_00.pdf)
*asthma2 (2001-2010): 1=Yes, 2=No, 7=Don't know/Not sure, 9=Refused, BLANK=skipped or missing (https://www.cdc.gov/brfss/annual_data/2004/pdf/Codebook_04.pdf)
*asthma3 (2011-2019): 1=Yes, 2=No, 7=Don't know/Not sure, 9=Refused (https://www.cdc.gov/brfss/annual_data/2011/pdf/CODEBOOK11_LLCP.pdf)

*Cholesterol (ever told by doctor you have high blood cholesterol):
*toldhi (1993-2000): 1=Yes, 2=No, 7=Don't know/Not sure, 9=Refused (https://www.cdc.gov/brfss/annual_data/1993/pdf/layout93.pdf)
*toldhi2 (2001-2005, 2007, 2009, 2011, 2013, 2015, 2015, 2019): 1=Yes, 2=No, 7=Don't know/Not sure, 9=Refused (https://www.cdc.gov/brfss/annual_data/2001/pdf/codebook_01.pdf)

*Heart attack (ever told by doctor you have heart attack) (asked by all states since 2005):
*cvdinfar (1996-2000): 1=Yes, 2=No, 7=Don't know/Not sure, 9=Refused, BLANK=skipped (https://www.cdc.gov/brfss/annual_data/2000/pdf/codebook_00.pdf)
*cvdinfr2 (2001-2004): 1=Yes, 2=No, 7=Don't know/Not sure, 9=Refused, BLANK=skipped (https://www.cdc.gov/brfss/annual_data/2001/pdf/codebook_01.pdf)
*cvdinfr3 (2005-2006): 1=Yes, 2=No, 7=Don't know/Not sure, 9=Refused, BLANK=skipped (https://www.cdc.gov/brfss/annual_data/2006/pdf/codebook_06.pdf)
*cvdinfr4 (2007-2019): 1=Yes, 2=No, 7=Don't know/Not sure, 9=Refused, BLANK=skipped (https://www.cdc.gov/brfss/annual_data/2012/pdf/CODEBOOK12_LLCP.pdf)


*Coronary heart disease (ever told by doctor you have coronary heart disease) (asked by all states since 2005):
*cvdcorhd (1996-2000): 1=Yes, 2=No, 7=Don't know/Not sure, 9=Refused, BLANK=skipped (https://www.cdc.gov/brfss/annual_data/2000/pdf/codebook_00.pdf)
*cvdcrhd2 (2001-2004): 1=Yes, 2=No, 7=Don't know/Not sure, 9=Refused, BLANK=skipped (https://www.cdc.gov/brfss/annual_data/2001/pdf/codebook_01.pdf)
*cvdcrhd3 (2005-2006): 1=Yes, 2=No, 7=Don't know/Not sure, 9=Refused, BLANK=skipped (https://www.cdc.gov/brfss/annual_data/2006/pdf/codebook_06.pdf)
*cvdcrhd4 (2007-2019): 1=Yes, 2=No, 7=Don't know/Not sure, 9=Refused, BLANK=skipped (https://www.cdc.gov/brfss/annual_data/2012/pdf/CODEBOOK12_LLCP.pdf)


*Stroke (ever told by doctor you have stroke) (asked by all states since 2005):
*cvdstrok (1996-2000): 1=Yes, 2=No, 7=Don't know/Not sure, 9=Refused, BLANK=skipped (https://www.cdc.gov/brfss/annual_data/2000/pdf/codebook_00.pdf)
*cvdstrk2 (2001-2004): 1=Yes, 2=No, 7=Don't know/Not sure, 9=Refused, BLANK=skipped (https://www.cdc.gov/brfss/annual_data/2001/pdf/codebook_01.pdf)
*cvdstrk3 (2005-2019): 1=Yes, 2=No, 7=Don't know/Not sure, 9=Refused, BLANK=skipped (https://www.cdc.gov/brfss/annual_data/2006/pdf/codebook_06.pdf)


*Diabetes (ever told by doctor you have diabetes):
*diabetes (1993): 1=Yes, 2=No, 7=Don't know/Not sure, 9=Refused (https://www.cdc.gov/brfss/annual_data/1993/pdf/layout93.pdf)
*diabetes (1994-2003): 1=Yes, 2=Yes but during pregnancy, 3=No, 7=Don't know/Not sure, 9=Refused (https://www.cdc.gov/brfss/annual_data/2000/pdf/codebook_00.pdf)
*diabete2 (2004-2010): 1=Yes, 2=Yes but during pregnancy, 3=No, 4=borderline diabetes, 7=Don't know/Not sure, 9=Refused, BLANK=skipped or missing (https://www.cdc.gov/brfss/annual_data/2004/pdf/Codebook_04.pdf)
*diabete3 (2011-2018): 1=Yes, 2=Yes but during pregnancy, 3=No, 4=borderline diabetes, 7=Don't know/Not sure, 9=Refused, BLANK=skipped or missing (https://www.cdc.gov/brfss/annual_data/2011/pdf/CODEBOOK11_LLCP.pdf)
*diabete4 (2019): 1=Yes, 2=Yes but during pregnancy, 3=No, 4=borderline diabetes but during pregnancy, 7=Don't know/Not sure, 9=Refused, BLANK=skipped or missing (https://www.cdc.gov/brfss/annual_data/2019/pdf/codebook19_llcp-v2-508.HTML)


*High blood pressure (ever told by doctor you have high blood pressure):
*bphigh (1993-2000): 1=Yes, 2=No, 7=Don't know/Not sure, 9=Refused, BLANK=skipped (https://www.cdc.gov/brfss/annual_data/2000/pdf/codebook_00.pdf)
*bphigh2 (2001): 1=Yes, 2=No, 7=Don't know/Not sure, 9=Refused, BLANK=skipped (https://www.cdc.gov/brfss/annual_data/2001/pdf/codebook_01.pdf)
*bphigh3 (2002-2004): 1=Yes, 2=Yes but during pregnancy, 3=No, 7=Don't know/Not sure, BLANK=skipped (https://www.cdc.gov/brfss/annual_data/2004/pdf/Codebook_04.pdf)
*bphigh4 (2005, 2007, 2009, 2011, 2013, 2015, 2015, 2019): 1=Yes, 2=Yes but during pregnancy, 3=No, 4=borderline high, 7=Don't know/Not sure, 9=Refused, BLANK=skipped (https://www.cdc.gov/brfss/annual_data/2015/pdf/codebook17_llcp-v2-508.pdf)


*last routine checkup:
*checkup (1993-2002, 2005-2006): 1=1 to 12 months ago, 2=1 to 2 years ago, 3=2 to 5 years ago, 4=5 or more years ago, 7=Don't know/not sure, 8=Never, 9=Refused (https://www.cdc.gov/brfss/annual_data/2000/pdf/codebook_00.pdf)
*checkup1 (2007-2019): 1=less than 12 months ago, 2=1 to less than 2 years ago, 3=2 to less than 5 years ago, 4=5 or more years ago, 7=Don't know/not sure, 8=Never, 9=Refused (https://www.cdc.gov/brfss/annual_data/2011/pdf/CODEBOOK11_LLCP.pdf)


*******************Alcohol drinking questions start*******************
*Any alcohol drinking in past 30 days:
*drinkany (1993-2000): 1=Yes, 2=No, 7=Don't know/Not sure, 9=Refused, BLANK=skipped (https://www.cdc.gov/brfss/annual_data/2000/pdf/codebook_00.pdf)
*(no such question in 2001-2004, since it was combined with alcdays and alcday3. There were calculated variables based on alcdays and alcday3)
*drnkany4 (2005-2010): 1=Yes, 2=No, 7=Don't know/Not sure, 9=Refused, BLANK=skipped (https://www.cdc.gov/brfss/annual_data/2005/pdf/Codebook_05.pdf)
*no such question in 2010-2019, since it was combined with alcdays and alcday3. There were calculated variables based on alcday5)


*Days of alcohol drinking in the last 30 days (asked by all states since 2001):
*alcohol (1993-2000): 101-107=Number of days per week (1 indicates weekly number), 201-231= Number of days per month (2 indicates monthly number), 777=Don't know/Not sure, 999=Refused, BLANK=skipped (https://www.cdc.gov/brfss/annual_data/2000/pdf/codebook_00.pdf)
*alcdays (2001): 101-107=Number of days per week (1 indicates weekly number), 201-230= Number of days per month (2 indicates monthly number), 777=Don't know/Not sure, 888=No drink, 999=Refused, BLANK=skipped (https://www.cdc.gov/brfss/annual_data/2001/pdf/codebook_01.pdf)
*alcday3 (2002-2004): 101-107=Number of days per week (1 indicates weekly number), 201-230= Number of days per month (2 indicates monthly number), 777=Don't know/Not sure, 888=No drink, 999=Refused, BLANK=skipped (https://www.cdc.gov/brfss/annual_data/2002/pdf/codebook_02.pdf)
*alcday4 (2005-2010): 101-107=Number of days per week (1 indicates weekly number), 201-230= Number of days per month (2 indicates monthly number), 777=Don't know/Not sure, 888=No drink, 999=Refused, BLANK=skipped (https://www.cdc.gov/brfss/annual_data/2010/pdf/codebook_10.pdf)
*alcday5 (2011-2019): 101-107=Number of days per week (1 indicates weekly number), 201-230= Number of days per month (2 indicates monthly number), 777=Don't know/Not sure, 888=No drink, 999=Refused, BLANK=skipped (https://www.cdc.gov/brfss/annual_data/2011/pdf/CODEBOOK11_LLCP.pdf)
*******************Alcohol drinking questions end*******************




*******************Smoking questions start*******************
*Ever smoked at least 100 cigarettes in life:
*smoke100 (1993-2019): 1=Yes, 2=No, 7=Don't know/Not sure, 9=Refused, BLANK=skipped (https://www.cdc.gov/brfss/annual_data/1993/pdf/layout93.pdf)



*Currently a smoker or not:
*smokenow (1993-1995): 1=Yes, 2=No, 7=Don't know/Not sure, 9=Refused, BLANK=skipped (https://www.cdc.gov/brfss/annual_data/1995/pdf/layout95.pdf)
*smokeday (1996-2004): 1=Everyday, 2=Some days, 3=Not at all, 9=Refused, BLANK=skipped (https://www.cdc.gov/brfss/annual_data/1996/files/CODEBK96.TXT)
*smokday2 (2005-2019): 1=Everyday, 2=Some days, 3=Not at all, 7=Don't know/Not sure, 9=Refused, BLANK=skipped (https://www.cdc.gov/brfss/annual_data/2005/pdf/Codebook_05.pdf)


*Number of cigarettes now smoke per day:
*smokenum (1993-2000): 1-87= Number of cigarettes (1 pack=20 cigarettes), 88=Don't smoke, 99=Refused (https://www.cdc.gov/brfss/annual_data/1993/pdf/layout93.pdf)
*(This question was not asked since 2001)
*******************Smoking questions end*******************



*******************Fruit and vegetables questions start*******************
*How often drink fruit juice?
*fruitjui (1994, 1996, 1998, 2000, 2002, 2003, 2005, 2007, 2009): 101-199=per day, 201-299=per week, 301-399=per month, 401-499=per year, 555=Never, 777=Don’t know/not sure, 999=Refused (https://www.cdc.gov/brfss/annual_data/2005/pdf/Codebook_05.pdf)
*fruitju1 (2011-2013, 2015): 101-199=per day, 201-299=per week, 300=Less than one time per month, 301-399=per month, 555=Never, 777=Don’t know/not sure, 999=Refused (https://www.cdc.gov/brfss/annual_data/2011/pdf/CODEBOOK11_LLCP.pdf)
*fruitju2 (2015, 2019): 101-199=per day, 201-299=per week, 300=Less than one time per month, 301-399=per month/year, 555=Never, 777=Don’t know/not sure, 999=Refused (https://www.cdc.gov/brfss/annual_data/2015/pdf/codebook17_llcp-v2-508.pdf)

*How often eat fruit?
*fruit (1994, 1996, 1998, 2000, 2002, 2003, 2005, 2007, 2009): 101-199=per day, 201-299=per week, 301-399=per month, 401-499=per year, 555=Never, 777=Don’t know/not sure, 999=Refused (https://www.cdc.gov/brfss/annual_data/2005/pdf/Codebook_05.pdf)
*fruit1 (2011, 2013, 2015): 101-199=per day, 201-299=per week, 300=Less than one time per month, 301-399=per month, 555=Never, 777=Don’t know/not sure, 999=Refused (https://www.cdc.gov/brfss/annual_data/2011/pdf/CODEBOOK11_LLCP.pdf)
*fruit2 (2015, 2019): 101-199=per day, 201-299=per week, 300=Less than one time per month, 301-399=per month, 555=Never, 777=Don’t know/not sure, 999=Refused (https://www.cdc.gov/brfss/annual_data/2015/pdf/codebook17_llcp-v2-508.pdf)

*How often eat green salad?
*greensal (1994, 1996, 1998, 2000, 2002, 2003, 2005, 2007, 2009): 101-199=per day, 201-299=per week, 301-399=per month, 401-499=per year, 555=Never, 777=Don’t know/not sure, 999=Refused (https://www.cdc.gov/brfss/annual_data/2005/pdf/Codebook_05.pdf)

*How often eat potatoes (not fried)?
*potatoes (1994, 1996, 1998, 2000, 2002, 2003, 2005, 2007, 2009): 101-199=per day, 201-299=per week, 301-399=per month, 401-499=per year, 555=Never, 777=Don’t know/not sure, 999=Refused (https://www.cdc.gov/brfss/annual_data/2005/pdf/Codebook_05.pdf)

*How often eat carrots?
*carrots (1994, 1996, 1998, 2000, 2002, 2003, 2005, 2007, 2009): 101-199=per day, 201-299=per week, 301-399=per month, 401-499=per year, 555=Never, 777=Don’t know/not sure, 999=Refused (https://www.cdc.gov/brfss/annual_data/2005/pdf/Codebook_05.pdf)

*How often eat vegetables?
*vegetables (1994, 1996, 1998, 2000, 2002, 2003, 2005, 2007, 2009): 101-199=per day, 201-299=per week, 301-399=per month, 401-499=per year, 555=Never, 777=Don’t know/not sure, 999=Refused (https://www.cdc.gov/brfss/annual_data/2005/pdf/Codebook_05.pdf)

*Number of daily servings of fruits and vegetables
*_frtserv (1994): 0-999.98=numbers of times per day, 999.99=Don’t know/refused/missing (https://www.cdc.gov/brfss/annual_data/1994/pdf/layout94.pdf)
*_frtserv (1996, 1998, 2000, 2002, 2003, 2005, 2007, 2009): 0-99998=numbers of times per day, 99999=Don’t know/refused/missing (last two digits correspond to two decimal places) (https://www.cdc.gov/brfss/annual_data/1996/files/CODEBK96.TXT)

*Number of fruit consumed per day (the two following variables' last one digit should correspond to one decimal place, but the codebook mis-stated that the last two digits correspond to two decimal places)
*_frutsum (2011, 2013, 2015): 0-99998=number of times per day, Blank=not asked/missing (last one digit correspond to one decimal place) (https://www.cdc.gov/brfss/annual_data/2011/pdf/CODEBOOK11_LLCP.pdf)
*_frutsu1 (2015, 2019): 0-99998=number of times per day, Blank=not asked/missing (last one digit correspond to one decimal place) (https://www.cdc.gov/brfss/annual_data/2015/pdf/codebook17_llcp-v2-508.pdf)

*Number of vegetables consumed per day (the two following variables' last one digit should correspond to one decimal place, but the codebook mis-stated that the last two digits correspond to two decimal places)
*_vegesum (2011, 2013, 2015): 0-99998=number of times per day, Blank=not asked/missing (last one digit correspond to one decimal place) (https://www.cdc.gov/brfss/annual_data/2011/pdf/CODEBOOK11_LLCP.pdf)
*_vegesu1 (2015, 2019): 0-99998=number of times per day, Blank=not asked/missing (last one digit correspond to one decimal place) (https://www.cdc.gov/brfss/annual_data/2015/pdf/codebook17_llcp-v2-508.pdf)
*******************Fruit and vegetables questions end*******************


*Flu shot last year?
*flushot (1993, 1995, 1997, 1999, 2001-2003): 1=Yes, 2=No, 7=Don’t know/not sure, 9=Refused, Blank=Skipped (https://www.cdc.gov/brfss/annual_data/2003/pdf/Codebook_03.pdf)
*flushot2 (2004): 1=Yes, 2=No, 7=Don’t know, 9=Refused (https://www.cdc.gov/brfss/annual_data/2004/pdf/Codebook_04.pdf)
*flushot3 (2005-2009): 1=Yes, 2=No, 7=Don’t know, 9=Refused (https://www.cdc.gov/brfss/annual_data/2005/pdf/Codebook_05.pdf)
*flushot4 (2010): 1=Yes, 2=No, 7=Don’t know, 9=Refused, Blank=Not asked or missing (https://www.cdc.gov/brfss/annual_data/2010/pdf/codebook_10.pdf)
*flushot5 (2011-2012): 1=Yes, 2=No, 7=Don’t know, 9=Refused, Blank=Not asked or missing (https://www.cdc.gov/brfss/annual_data/2011/pdf/CODEBOOK11_LLCP.pdf)
*flushot6 (2013-2018): 1=Yes, 2=No, 7=Don’t know, 9=Refused, Blank=Not asked or missing (https://www.cdc.gov/brfss/annual_data/2013/pdf/CODEBOOK13_LLCP.pdf)
*flushot7 (2019): 1=Yes, 2=No, 7=Don’t know, 9=Refused, Blank=Not asked or missing (https://www.cdc.gov/brfss/annual_data/2019/pdf/codebook19_llcp-v2-508.HTML)


*Seat belt usage
*seatbelt (1993, 1995, 1997, 2002, 2006, 2008, 2010-2018): 1=Always, 2=Nearly always, 3=Sometimes, 4=Seldom, 5=Never, 7=Don’t know/not sure, 8=Never drive or ride in a car, 9=Refused, Blank=Skipped (https://www.cdc.gov/brfss/annual_data/2018/pdf/codebook18_llcp-v2-508.pdf)



*physical activities in last month:
*exerany (1993-2000): 1=Yes, 2=No, 7=Don't know/Not sure, 9=Refused, BLANK=skipped (https://www.cdc.gov/brfss/annual_data/2000/pdf/codebook_00.pdf)
*exerany2 (2001-2019): 1=Yes, 2=No, 7=Don't know/Not sure, 9=Refused, BLANK=skipped (https://www.cdc.gov/brfss/annual_data/2015/pdf/codebook17_llcp-v2-508.pdf)


*medcost (Was there a time during the last 12 months when you needed to see a doctor, but could not because of the cost?):
*medcost (1993-2001, 2003-2019): 1=Yes, 2=No, 7=Don't know/not sure, 9=Refused (https://www.cdc.gov/brfss/annual_data/1993/pdf/layout93.pdf)

*healthcare plan (Do you have any kind of health care coverage, including health insurance, prepaid plans such as HMOs, or government plans such as Medicare?):
*hlthplan (1993-2010): 1=Yes, 2=No, 7=Don't know/not sure, 9=Refused (https://www.cdc.gov/brfss/annual_data/1993/pdf/layout93.pdf)
*hlthpln1 (2011-2019): 1=Yes, 2=No, 7=Don't know/not sure, 9=Refused (https://www.cdc.gov/brfss/annual_data/2011/pdf/CODEBOOK11_LLCP.pdf)


*Medicare (Do you have Medicare?):
*medicare (1996-1997): 1=Yes, 2=No, 7=Don't know/not sure, 9=Refused, BLANK=Skipped since answer to hlthplan=2,7,9 (https://www.cdc.gov/brfss/annual_data/1996/files/CODEBK96.TXT)
*medicar2 (1998-2000): 1=Yes, 2=No, 7=Don't know/not sure, 9=Refused, BLANK=Skipped since answer to hlthplan=2,7,9 (https://www.cdc.gov/brfss/annual_data/1998/pdf/Codebook_98.pdf)


*type of healthcare plan 1 (What type of health care coverage do you use to pay for most of your medical care? Is it coverage through:):
*typcovr1 (1996-2000): 1=Your employer, 2=Someone's employer, 3=A plan that you or someone else buys on your own, 4=Medicare, 5=Medicaid or medical assistance, 6=Military, CHAMPUS, or VA, 7=Indian Health Service, 8=Some other source, 77=Don't know/not sure, 88=None, 99=Refused, BLANK=Skipped since answer to hlthplan=2,7,9 (https://www.cdc.gov/brfss/annual_data/1996/files/CODEBK96.TXT)


*type of healthcare plan 2 (There are some types of coverage you may not have considered. Please tell me if you have any of the following: (Only persons HLTHPLAN=2)):
*typcovr2 (1996-2000): 1=Your employer, 2=Someone's employer, 3=A plan that you or someone else buys on your own, 4=Medicare, 5=Medicaid or medical assistance, 6=Military, CHAMPUS, or VA, 7=Indian Health Service, 8=Some other source, 77=Don't know/not sure, 88=None, 99=Refused, BLANK=Skipped since answer to hlthplan=2,7,9 (https://www.cdc.gov/brfss/annual_data/1996/files/CODEBK96.TXT)




*height:
*htf (height in feet) (1993-2000): 2-6=Number of feet, 7=7 feet or Don't know/not sure, 9=Refused (https://www.cdc.gov/brfss/annual_data/1994/pdf/layout94.pdf)
*htm (height in meters) (2001-2002): 999=Don't know/refused/missing (https://www.cdc.gov/brfss/annual_data/2002/pdf/codebook_02.pdf)
*htm2 (height in meters) (2003): 999=Don't know/refused/missing (https://www.cdc.gov/brfss/annual_data/2003/pdf/Codebook_03.pdf)
*htm3 (height in meters) (2004-2010): 999=Don't know/refused/missing (https://www.cdc.gov/brfss/annual_data/2004/pdf/Codebook_04.pdf)
*htm4 (height in meters) (2011-2019) (https://www.cdc.gov/brfss/annual_data/2011/pdf/CODEBOOK11_LLCP.pdf)
*hti (height in inches) (1993-2000): 77=Don't know/not sure, 99=Refused (https://www.cdc.gov/brfss/annual_data/2000/pdf/codebook_00.pdf)
*htin (height in inches) (2001-2002): 99=Don't know/refused/missing (https://www.cdc.gov/brfss/annual_data/2002/pdf/codebook_02.pdf)
*htin2 (height in inches) (2003): 999=Don't know/refused/missing (https://www.cdc.gov/brfss/annual_data/2003/pdf/Codebook_03.pdf)
*htin3 (height in inches) (2004-2010): 999=Don't know/refused/missing (https://www.cdc.gov/brfss/annual_data/2004/pdf/Codebook_04.pdf)
*htin4 (height in inches) (2011-2019) (https://www.cdc.gov/brfss/annual_data/2011/pdf/CODEBOOK11_LLCP.pdf)
*height (height in feet and inches, 3-digit number) (1993-2003): 777=Don't know/not sure, 999=Refused (https://www.cdc.gov/brfss/annual_data/2001/pdf/codebook_01.pdf)
*height2 (height in feet and inches or in meters and centimeters, 3-digit number for feet and inches (<777), 4-digit number for meters and centimeters) (2004): 777=Don't know/not sure, 999=Refused, 7777=Don't know/not sure, 9999=Refused, BLANK=Missing (https://www.cdc.gov/brfss/annual_data/2004/pdf/Codebook_04.pdf)
*height3 (height in feet and inches or in meters and centimeters, 3-digit number for feet and inches, 4-digit number for meters and centimeters) (2005-2019): 7777=Don't know/not sure, 9999=Refused, BLANK=Not asked/missing (https://www.cdc.gov/brfss/annual_data/2005/pdf/Codebook_05.pdf)


*weight:
*weight (weight in pounds) (1993-2003): 777=Don't know/Not sure, 999=Refused (https://www.cdc.gov/brfss/annual_data/1993/pdf/layout93.pdf)
*weight2 (weight in pounds or kilograms) (2004-2019): 3-digit number for weight in pounds, 4-digit number for weight in kilograms, 777=Don't know/not sure, 7777=Don't know/not sure, 9999=Refused (https://www.cdc.gov/brfss/annual_data/2004/pdf/Codebook_04.pdf)

*education:
*educa (1993-2019): 1=Never attended school/kindergarten only, 2=Elementary, 3=Some high school, 4=High school graduate, 5=Some college/technical school, 6=College graduate or more, 9=Refused (https://www.cdc.gov/brfss/annual_data/1993/pdf/layout93.pdf)


*income:
*income (1993): 1=less than 10K, 2=10K to less than 15K, 3=15K to less than 20K, 4=20K to less than 25K, 5= 25K to less than 35K, 6=35K to 50K, 7=Over 50K, 8=Don't know/not sure, 9=Refused (https://www.cdc.gov/brfss/annual_data/1993/pdf/layout93.pdf)
*income (1994): 1=less than 10K, 2=10K to less than 15K, 3=15K to less than 20K, 4=20K to less than 25K, 5= 25K to less than 35K, 6=35K to less than 50K, 7=50K to 75K, 8=Over 75K, 77=Don't know/not sure, 99=Refused (https://www.cdc.gov/brfss/annual_data/1994/pdf/layout94.pdf)
*income95 (1995): 1=less than 10K, 2=10K to 14999, 3=15K to 19999, 4=20K to 24999, 5= 25K to 34999, 6=35K to 49999, 7=50K to 74999, 8=Over 75K, 77=Don't know/not sure, 99=Refused (https://www.cdc.gov/brfss/annual_data/1995/pdf/layout95.pdf)
*income2 (1996-2019): 1=less than 10K, 2=10K to 14999, 3=15K to 19999, 4=20K to 24999, 5= 25K to 34999, 6=35K to 49999, 7=50K to 74999, 8=Over 75K, 77=Don't know/not sure, 99=Refused (https://www.cdc.gov/brfss/annual_data/1996/files/CODEBK96.TXT)


*County:
*ctycode (1993-2010): county code in numeric form
*ctycode1 (2011-2012): county code in numeric form


*The following state-years are missing (https://www.cdc.gov/brfss/annual_data/all_years/states_data.htm):
*Wyoming (FIPS: 56): 1993 (This is not documented)
*Rhode Island (FIPS: 44): 1994 (The documentation said that Rhode Island is missing in 1993, but it is wrong)
*Washington DC (FIPS: 11): 1995
*Hawaii (FIPS: 15): 2004
*New Jersey (FIPS: 34): 2019





keep if _age80>=18 //make sure the minimum age is 18



*Health
local new "srh"
local old "genhlth"
gen `new'=0 if `old'==5
replace `new'=1 if `old'==4
replace `new'=2 if `old'==3
replace `new'=3 if `old'==2
replace `new'=4 if `old'==1
label var `new' "Self-rated health"
label define `new' 0"Poor" 1"Fair" 2"Good" 3"VeryGood" 4"Excellent"
numlabel `new', mask(#) add
label value `new' `new'
note `new': Original: `old' | `tag'


local new "badphyhlth"
local old "physhlth"
gen `new'=`old' if `old'<77
replace `new'=0 if `old'==88
label var `new' "Number of days in last month with not good physical health"
note `new': Original: `old' | `tag'


local new "badmenhlth"
local old "menthlth"
gen `new'=`old' if `old'<77
replace `new'=0 if `old'==88
label var `new' "Number of days in last month with not good mental health"
note `new': Original: `old' | `tag'


local new "actpoorhlth"
local old "poorhlth"
gen `new'=`old' if `old'<77
replace `new'=0 if `old'==88
label var `new' "Number of days in last month keeping R from usual activities due to poor physical/mental health"
note `new': Original: `old' | `tag'


local new "asthmadiag"
local old "asthma3"
gen `new'=0 if `old'==2
replace `new'=1 if `old'==1
label var `new' "Ever told you have asthma"
label define `new' 0"No" 1"Yes"
numlabel `new', mask(#) add
label value `new' `new'
note `new': Original: `old' | `tag'


local new "cholesteroldiag"
local old "toldhi2"
gen `new'=0 if `old'==2
replace `new'=1 if `old'==1
label var `new' "Ever told you have high cholesterol"
label define `new' 0"No" 1"Yes"
numlabel `new', mask(#) add
label value `new' `new'
note `new': Original: `old' | `tag'


local new "heartattackdiag"
local old "cvdinfr4"
gen `new'=0 if `old'==2
replace `new'=1 if `old'==1
label var `new' "Ever told you have heart attack"
label define `new' 0"No" 1"Yes"
numlabel `new', mask(#) add
label value `new' `new'
note `new': Original: `old' | `tag'


local new "heartdiseasediag"
local old "cvdcrhd4"
gen `new'=0 if `old'==2
replace `new'=1 if `old'==1
label var `new' "Ever told you have heart disease"
label define `new' 0"No" 1"Yes"
numlabel `new', mask(#) add
label value `new' `new'
note `new': Original: `old' | `tag'


local new "strokediag"
local old "cvdstrk3"
gen `new'=0 if `old'==2
replace `new'=1 if `old'==1
label var `new' "Ever told you have stroke"
label define `new' 0"No" 1"Yes"
numlabel `new', mask(#) add
label value `new' `new'
note `new': Original: `old' | `tag'


local new "diabetesdiag"
local old "diabete3"
gen `new'=0 if `old'>=3 & `old'<=4
replace `new'=1 if `old'==1
label var `new' "Ever told you have diabetes"
label define `new' 0"No" 1"Yes"
numlabel `new', mask(#) add
label value `new' `new'
note `new': Original: `old' | `tag'


local new "highbloodpressurediag"
local old "bphigh4"
gen `new'=0 if `old'>=3 & `old'<=4
replace `new'=1 if `old'==1
label var `new' "Ever told you have high blood pressure"
label define `new' 0"No" 1"Yes"
numlabel `new', mask(#) add
label value `new' `new'
note `new': Original: `old' | `tag'


local new "actlim"
local old "poorhlth"
gen `new'=0 if `old'<14 | `old'==88 | physhlth==88 | menthlth==88
replace `new'=1 if `old'>=14 & `old'<77 //14 days cutoff from (Dwyer-Lindgren et al. 2015)
label var `new' "Frequent activity limitation"
label define `new' 0"No" 1"Yes"
numlabel `new', mask(#) add
label value `new' `new'
note `new': Original: `old' | `tag'


local new "walkdiff"
local old "diffwalk"
gen `new'=2-`old' if `old'<7
label var `new' "Difficulty walking or climbing stairs"
label define `new' 0"No" 1"Yes"
numlabel `new', mask(#) add
label value `new' `new'
note `new': Original: `old' | `tag'


local new "dressdiff"
local old "diffdres"
gen `new'=2-`old' if `old'<7
label var `new' "Difficulty dressing or bathing"
label define `new' 0"No" 1"Yes"
numlabel `new', mask(#) add
label value `new' `new'
note `new': Original: `old' | `tag'


local new "erranddiffalone"
local old "diffalon"
gen `new'=2-`old' if `old'<7
label var `new' "Difficulty doing errands alone (e.g., shopping, visit doctor)"
label define `new' 0"No" 1"Yes"
numlabel `new', mask(#) add
label value `new' `new'
note `new': Original: `old' | `tag'


local new "funclimit"
local old1 "actlim"
local old2 "walkdiff"
local old3 "dressdiff"
local old4 "erranddiffalone"
gen `new'=`old1' +`old2' + `old3' + `old4'
label var `new' "Functional limitation index"
note `new': Original: `old1' `old2' `old3' `old4' | `tag'






*Health behaviors
local new "bodycheck"
local old "checkup1"
gen `new'=5-`old' if `old'<=4
replace `new'=0 if `old'==8
label var `new' "Last time did body check up"
label define `new' 0"Never" 1"5orMoreYearsAgo" 2"2-5YearsAgo" 3"1-2YearsAgo" 4"1-12MonthsAgo"
numlabel `new', mask(#_) add
label value `new' `new'
note `new': Original: `old' | `tag'


local new "alcoholnum"
local old "alcday5"
gen `new'=(`old'-100)/7 if `old'>=100 & `old'<=107
replace `new'=(`old'-200)/30 if `old'>=201 & `old'<=230
replace `new'=0 if `old'==888
label var `new' "Average number of days of alcohol drinking in last month"
note `new': Original: `old' | `tag'


local new "bingedrink"
local old "_rfbing5"
gen `new'=`old'-1 if `old'<9
label var `new' "Binge drinking in last 30 days"
label define `new' 0"No" 1"Yes"
numlabel `new', mask(#) add
label value `new' `new'
note `new': Original: `old' | `tag'


local new "heavydrink"
local old "_rfdrhv5"
gen `new'=`old'-1 if `old'<9
label var `new' "Heavy drinking in last 30 days"
label define `new' 0"No" 1"Yes"
numlabel `new', mask(#) add
label value `new' `new'
note `new': Original: `old' | `tag'



local new "smokeever"
local old "smoke100"
gen `new'=0 if `old'==2
replace `new'=1 if `old'==1
label var `new' "Ever smoke at least 100 cigarettes in life"
label define `new' 0"No" 1"Yes"
numlabel `new', mask(#) add
label value `new' `new'
note `new': Original: `old' | `tag'


local new "smoking"
local old "smokday2"
gen `new'=0 if smokeever==0
replace `new'=1 if `old'==3 & smokeever==1
replace `new'=2 if `old'==2
replace `new'=3 if `old'==1
label var `new' "How often smoke cigarettes"
label define `new' 0"Never" 1"NoSmokingReg" 2"SomeDays" 3"Everyday"
numlabel `new', mask(#) add
label value `new' `new'
note `new': Original: `old' | `tag'


local new "smokingsim"
local old "smoking"
gen `new'=0 if `old'==0
replace `new'=1 if `old'==1
replace `new'=2 if `old'==2 | `old'==3
label var `new' "How often smoke cigarettes"
label define `new' 0"Never" 1"NoSmokingReg" 2"SomeorEveryDay"
numlabel `new', mask(#) add
label value `new' `new'
note `new': Original: `old' | `tag'


local new "vegserv" //this variable includes dark green vegetables, french fries, potato, and other vegetables intake
local old "_vegesum"
gen `new'=`old'/100 if _vegetex==0 //exclude those who reported 24 servings or more per day (too high, according to official codebook)
label var `new' "Vegetables servings per day"
note `new': Original: `old' | `tag'


local new "veg1perday"
local old "vegserv"
gen `new'=0 if `old'<1
replace `new'=1 if `old'>=1 & !mi(`old')
label var `new' "1 or more vegetables servings per day"
label define `new' 0"No" 1"Yes"
numlabel `new', mask(#) add
label value `new' `new'
note `new': Original: `old' | `tag'


local new "greenvegserv"
local old "grenday_"
gen `new'=`old'/100 if !mi(`old') & `old'<1000 //exclude those who reported 10 servings or more per day (too high)
label var `new' "Dark green vegetables servings per day"
note `new': Original: `old' | `tag'


local new "greenveg1perday"
local old "greenvegserv"
gen `new'=0 if `old'<1
replace `new'=1 if `old'>=1 & !mi(`old')
label var `new' "1 or more green vegetables servings per day"
label define `new' 0"No" 1"Yes"
numlabel `new', mask(#) add
label value `new' `new'
note `new': Original: `old' | `tag'



local new "othvegserv"
local old "vegeda1_"
gen `new'=`old'/100 if !mi(`old') & `old'<1000 //exclude those who reported 10 servings or more per day (too high)
label var `new' "Other vegetables servings per day"
note `new': Original: `old' | `tag'


local new "otherveg1perday"
local old "othvegserv"
gen `new'=0 if `old'<1
replace `new'=1 if `old'>=1 & !mi(`old')
label var `new' "1 or more other vegetables servings per day"
label define `new' 0"No" 1"Yes"
numlabel `new', mask(#) add
label value `new' `new'
note `new': Original: `old' | `tag'


local new "totalfruitserv" //this variable includes fruit juice and fruit intake
local old "_frutsum"
gen `new'=`old'/100 if _vegetex==0 //exclude those who reported 17 servings or more per day (too high, according to official codebook)
label var `new' "Total fruit servings per day"
note `new': Original: `old' | `tag'


local new "totalfruit1perday"
local old "totalfruitserv"
gen `new'=0 if `old'<1
replace `new'=1 if `old'>=1 & !mi(`old')
label var `new' "1 or more total fruit servings per day"
label define `new' 0"No" 1"Yes"
numlabel `new', mask(#) add
label value `new' `new'
note `new': Original: `old' | `tag'


local new "fruitjuiceserv"
local old "ftjuda1_"
gen `new'=`old'/100 if !mi(`old') & `old'<8000 //exclude those who reported 8 servings or more per day (too high)
label var `new' "Fruit juice servings per day"
note `new': Original: `old' | `tag'


local new "fruitjuice1perday"
local old "fruitjuiceserv"
gen `new'=0 if `old'<1
replace `new'=1 if `old'>=1 & !mi(`old')
label var `new' "1 or more fruit juice servings per day"
label define `new' 0"No" 1"Yes"
numlabel `new', mask(#) add
label value `new' `new'
note `new': Original: `old' | `tag'


local new "fruitserv"
local old "frutda1_"
gen `new'=`old'/100 if !mi(`old') & `old'<8000 //exclude those who reported 8 servings or more per day (too high)
label var `new' "Fruit servings per day"
note `new': Original: `old' | `tag'


local new "fruit1perday"
local old "fruitserv"
gen `new'=0 if `old'<1
replace `new'=1 if `old'>=1 & !mi(`old')
label var `new' "1 or more fruit servings per day"
label define `new' 0"No" 1"Yes"
numlabel `new', mask(#) add
label value `new' `new'
note `new': Original: `old' | `tag'



local new "anyflushot"
local old "flushot6"
gen `new'=0 if `old'==2
replace `new'=1 if `old'==1
label var `new' "Flu shot last year"
label define `new' 0"No" 1"Yes"
numlabel `new', mask(#) add
label value `new' `new'
note `new': Original: `old' | `tag'


local new "seatbeltusage"
local old "seatbelt"
gen `new'=5-`old' if `old'>=1 & `old'<=5
label var `new' "Seat belt usage"
label define `new' 0"Never" 1"Seldom" 2"Sometimes" 3"Nearly always" 4"Always"
numlabel `new', mask(#) add
label value `new' `new'
note `new': Original: `old' | `tag'


local new "weightinkg"
local old "weight2"
gen `new'=`old'*0.45359237 if `old'<777
replace `new'=`old'-9000 if `old'>7777 & `old'<9999
label var `new' "Weight (kg)"
note `new': Original: `old' | `tag'


local new "heightinmeter"
local old "height3"
gen `new'=real(substr(string(`old'), 1, 1))/3.2808+real(substr(string(`old'), 2, 2))/39.37 if `old'<777
replace `new'=(`old'-9000)/100 if `old'>7777 & `old'<9230
label var `new' "Height (m)"
note `new': Original: `old' | `tag'


local new "bmi"
local old "_bmi5"
gen `new'=`old'/100 if !mi(`old')
label var `new' "Body mass index"
note `new': Original: `old' | `tag'


local new "bmicat"
local old "_bmi5cat"
gen `new'=`old'-1 if `old'<=4
label var `new' "BMI category"
label define `new' 0"Underweight" 1"Normal" 2"Overweight" 3"Obese"
numlabel `new', mask(#) add
label value `new' `new'
note `new': Original: `old' | `tag'






*Physical activity
local new "exercise"
local old "exerany2"
gen `new'=0 if `old'==2
replace `new'=1 if `old'==1
label var `new' "Any exercise (except regular job) last month"
label define `new' 0"No" 1"Yes"
numlabel `new', mask(#) add
label value `new' `new'
note `new': Original: `old' | `tag'


local new "exercise1type"
local old "exract11"
gen `new'=`old' if `old'<77
label var `new' "Type of physical activity"
note `new': Original: `old' | `tag'


local new "exercise1freq"
local old "exeroft1"
gen `new'=`old' if `old'>=101 & `old'<=199
replace `new'=`old'/4 if `old'>=201 & `old'<=299
label var `new' "Frequency of exercise (times per week)"
note `new': Original: `old' | `tag'


local new "exercise1min"
local old "exerhmm1"
gen `new'=`old' if `old'<100
replace `new'=int(`old'/100)*60 + mod(`old',100) if `old'>=100 & `old'<=959 & `old'!=777 // int(`old'/100) extracts the hundreds digit and mod(`old',100) extracts the remainder of the number after division by 100
label var `new' "Usual exercise duration (minutes)"
note `new': Original: `old' | `tag'


local new "exercise2type"
local old "exract21"
gen `new'=`old' if `old'<77
label var `new' "Type of physical activity"
note `new': Original: `old' | `tag'


local new "exercise2freq"
local old "exeroft2"
gen `new'=`old' if `old'>=101 & `old'<=199
replace `new'=`old'/4 if `old'>=201 & `old'<=299
label var `new' "Frequency of exercise (times per week)"
note `new': Original: `old' | `tag'


local new "exercise2min"
local old "exerhmm2"
gen `new'=`old' if `old'<100
replace `new'=int(`old'/100)*60 + mod(`old',100) if `old'>=100 & `old'<=959 & `old'!=777 // int(`old'/100) extracts the hundreds digit and mod(`old',100) extracts the remainder of the number after division by 100
label var `new' "Usual exercise duration (minutes)"
note `new': Original: `old' | `tag'


local new "muscleexercise"
local old "strength"
gen `new'=`old' if `old'>=101 & `old'<=199
replace `new'=`old'/4 if `old'>=201 & `old'<=299
label var `new' "Frequency of muscle-strengthening exercise (times per week)"
note `new': Original: `old' | `tag'


local new "exe_total_min"
local old1 "pa1min_"
local old2 "exercise"
gen `new'=`old1' if !mi(`old1')
replace `new'=0 if `old2'==0
label var `new' "Minutes of the two reported exercises per week (last month)"
note `new': Original: `old1' `old2' | `tag'


local new "exe1_intensity"
local old1 "actin11_"
local old2 "exercise"
gen `new'=`old1'+1
replace `new'=0 if `old2'==0
label var `new' "Intensity of 1st reported type of exercise in last month"
label define `new' 0"NoExercise" 1"LightExercise" 2"ModerateExercise" 3"VigorousExercise"
numlabel `new', mask(#) add
label value `new' `new'
note `new': Original: `old1' `old2' | `tag'


local new "exe2_intensity"
local old1 "actin21_"
local old2 "exercise"
gen `new'=`old1'+1 //Note that respondents who reported only the 1st but not 2nd exercise are coded as missing
replace `new'=0 if `old2'==0
label var `new' "Intensity of 2nd reported type of exercise in last month"
label define `new' 0"NoExercise" 1"LightExercise" 2"ModerateExercise" 3"VigorousExercise"
numlabel `new', mask(#) add
label value `new' `new'
note `new': Original: `old1' `old2' | `tag'


local new "exe_intensity"
local old1 "exe1_intensity"
local old2 "exe2_intensity"
gen `new'=0 if `old1'==0 & `old2'==0
replace `new'=1 if `old1'==1 & mi(`old2')
replace `new'=2 if `old1'==2 & mi(`old2')
replace `new'=3 if `old1'==3 & mi(`old2')
replace `new'=4 if `old1'==1 & `old2'==1
replace `new'=5 if (`old1'==1 & `old2'==2) | (`old1'==2 & `old2'==1)
replace `new'=6 if (`old1'==1 & `old2'==3) | (`old1'==3 & `old2'==1)
replace `new'=7 if `old1'==2 & `old2'==2
replace `new'=8 if (`old1'==2 & `old2'==3) | (`old1'==3 & `old2'==2)
replace `new'=9 if `old1'==3 & `old2'==3
label var `new' "Intensity of two reported types of exercise in last month"
label define `new' 0"NoExercise" 1"OneLightEx" 2"OneModEx" 3"OneVigEx" 4"TwoLightEx" ///
5"Light&ModEx" 6"Light&VigEx" 7"TwoModEx" 8"Mod&VigEx" 9"TwoVigEx"
numlabel `new', mask(#) add
label value `new' `new'
note `new': Original: `old1' `old2' | `tag'


local new "exe_intensity_sim"
local old "exe_intensity"
gen `new'=0 if `old'==0
replace `new'=1 if `old'>=1 & `old'<=4
replace `new'=2 if `old'==5
replace `new'=3 if `old'==6
replace `new'=4 if `old'==7
replace `new'=5 if `old'==8
replace `new'=6 if `old'==9
label var `new' "Intensity of two reported types of exercise in last month"
label define `new' 0"NoExercise" 1"OneExOrTwoLight" 2"Light&ModEx" 3"Light&VigEx" 4"TwoModEx" 5"Mod&VigEx" 6"TwoVigEx"
numlabel `new', mask(#) add
label value `new' `new'
note `new': Original: `old1' `old2' | `tag'


local new "exe_intensity_simplest"
local old "exe_intensity_sim"
gen `new'=0 if `old'==0
replace `new'=1 if `old'>=1 & `old'<=4
replace `new'=2 if `old'==5
replace `new'=3 if `old'==6
label var `new' "Intensity of two reported types of exercise in last month"
label define `new' 0"NoExercise" 1"TwoModExOrBelow" 2"Mod&VigEx" 3"TwoVigEx"
numlabel `new', mask(#) add
label value `new' `new'
note `new': Original: `old1' `old2' | `tag'



local new "exe1_freq"
local old1 "pafreq1_"
local old2 "exercise"
gen `new'=`old1'/1000 if !mi(`old1')
replace `new'=0 if `old2'==0
replace `new'=. if `old1'==99000
label var `new' "Frequency of 1st reported type of exercise per week (last month)"
note `new': Original: `old1' `old2' | `tag'


local new "exe2_freq"
local old1 "pafreq2_"
local old2 "exercise"
local old3 "exe2_intensity"
gen `new'=`old1'/1000 if !mi(`old1')
replace `new'=0 if `old2'==0 | mi(`old3') //Respondents who reported the 1st but not the 2nd exercise are coded as 0
replace `new'=. if `old1'==99000
label var `new' "Frequency of 1st reported type of exercise per week (last month)"
note `new': Original: `old1' `old2' `old3' | `tag'


local new "exe1_min_per_week"
local old1 "_minac11"
local old2 "exercise"
gen `new'=`old1' if !mi(`old1')
replace `new'=4000 if `new'>=4000 & !mi(`new') //Top code the responses to be 4000 minutes per week.
replace `new'=0 if `old2'==0
label var `new' "Minutes of 1st reported type of exercise per week (last month)"
note `new': Original: `old1' `old2' | `tag'


xtile exe1_min_quartile=exe1_min_per_week, nq(4)
xtile exe1_min_quintile=exe1_min_per_week, nq(5)
xtile exe1_min_sextile=exe1_min_per_week, nq(6)
xtile exe1_min_dectile=exe1_min_per_week, nq(10)


local new "exe2_min_per_week"
local old1 "_minac21"
local old2 "exercise"
local old3 "exe2_intensity"
gen `new'=`old1' if !mi(`old1')
replace `new'=4000 if `new'>=4000 & !mi(`new') //Top code the responses to be 4000 minutes per week.
replace `new'=0 if `old2'==0 | mi(`old3') //Respondents who reported the 1st but not the 2nd exercise are coded as 0
label var `new' "Minutes of 2nd reported type of exercise per week (last month)"
note `new': Original: `old1' `old2' `old3' | `tag'

xtile exe2_min_quartile=exe2_min_per_week, nq(4)
xtile exe2_min_quintile=exe2_min_per_week, nq(5)
xtile exe2_min_sextile=exe2_min_per_week, nq(6)
xtile exe2_min_dectile=exe2_min_per_week, nq(10)


local new "strength_exe_freq"
local old "strfreq_"
gen `new'=`old'/1000
replace `new'=. if `old'==99000
label var `new' "Frequency of muscle strengthening exercise (times per week, last month)"
note `new': Original: `old' | `tag'

xtile strength_time_tertile=strength_exe_freq, nq(3) //only tertile can be calculated since the variable's distribution is very sparse.




local new "exe_vigeq_min_per_week"
local old1 "pa1min_"
local old2 "exercise"
gen `new'=`old1' if !mi(`old1')
replace `new'=0 if `old2'==0
label var `new' "Vigorous equivalent minutes of exercise per week (last month)"
note `new': Original: `old1' `old2' | `tag'


local new "exe_vig_min_per_week"
local old1 "pa1vigm_"
local old2 "exercise"
gen `new'=`old1' if !mi(`old1')
replace `new'=0 if `old2'==0
label var `new' "Vigorous minutes of exercise per week (last month)"
note `new': Original: `old1' `old2' | `tag'


local new "exe_active" //Note: this variable is not recommended to be used since respondents who only reported 1 type of exercise (about 2524) were excluded due to lack of data to classify their overall exercise time per week
local old "_pacat1"
gen `new'=4-`old' if `old'<9
label var `new' "Sufficient exercise per week in last month"
label define `new' 0"Inactive" 1"InsufficientlyActive" 2"Active" 3"HighlyActive"
numlabel `new', mask(#) add
label value `new' `new'
note `new': Original: `old' Exercise per week guideline reference: https://www.ncbi.nlm.nih.gov/books/NBK566046/ | `tag'


local new "exe_aerobic_advice"
local old "_pa150r2"
gen `new'=3-`old' if `old'<9
label var `new' "Meet aerobic exercise recommendations (150+ min per week) last month"
label define `new' 0"NoAerobicExercise" 1"Under149MinEx" 2"150+MinEx"
numlabel `new', mask(#) add
label value `new' `new'
note `new': Original: `old' | `tag'


local new "exe_strength_advice"
local old "_pastrng"
gen `new'=2-`old' if `old'<9
label var `new' "Meet muscle strengthening recommendations (at least 2 times per week) last month"
label define `new' 0"No" 1"Yes"
numlabel `new', mask(#) add
label value `new' `new'
note `new': Original: `old' | `tag'


local new "exe_aero_strength_advice"
local old "_parec1"
gen `new'=4-`old' if `old'<9
label var `new' "Meet areobic and muscle strengthening recommendations last month"
label define `new' 0"Neither" 1"StrengtheningOnly" 2"AerobicOnly" 3"Both"
numlabel `new', mask(#) add
label value `new' `new'
note `new': Original: `old' | `tag'


local new "metval1"
local old1 "metvl11_"
local old2 "exercise"
gen `new'=`old1' if `old1'<=128
replace `new'=0 if `old2'==0 //assign those who reported no regular exercise with 0 MET
label var `new' "Metabolic equivalents (METs) of the 1st reported exercise"
note `new': Original: `old1' `old2' | `tag'


local new "metval2"
local old1 "metvl11_"
local old2 "exercise"
gen `new'=`old1' if `old1'<=128
replace `new'=0 if `old2'==0 //assign those who reported no regular exercise with 0 MET
label var `new' "Metabolic equivalents (METs) of the 2nd reported exercise"
note `new': Original: `old1' `old2' | `tag'


local new "metval_avg"
local old1 "metval1"
local old2 "metval2"
gen `new'=(`old1'+`old2')/2
label var `new' "Average metabolic equivalents (METs) of the 2 reported exercises"
note `new': Original: `old' | `tag'


/*
local new "metval_quartile"
local old "metval_avg"
xtile `new'=`old', nq(4)
label var `new' "Average metabolic equivalents (METs) of the 2 reported exercises"
label define `new' 1"1stQuartile" 2"2ndQuartile" 3"3rdQuartile" 4"4thQuartile"
numlabel `new', mask(#) add
label value `new' `new'
local ivarlist `ivarlist' `new'
note `new': Original: `old' | `tag'
*/




*Healthcare
local new "afforddoc"
local old "medcost"
gen `new'=0 if `old'==2
replace `new'=1 if `old'==1
label var `new' "Cannot see doc due to cost in last 12 months"
label define `new' 0"No" 1"Yes"
numlabel `new', mask(#) add
label value `new' `new'
note `new': Original: `old' | `tag'


local new "hlthcare"
local old "hlthpln1"
gen `new'=0 if `old'==2
replace `new'=1 if `old'==1
label var `new' "Any healthcare coverage (either private or public)?"
label define `new' 0"No" 1"Yes"
numlabel `new', mask(#) add
label value `new' `new'
note `new': Original: `old' | `tag'


local new "hlthcare1864"
local old "_hcvu651"
gen `new'=0 if `old'==2
replace `new'=1 if `old'==1
label var `new' "Any healthcare coverage? (18-64 years old only)"
label define `new' 0"No" 1"Yes"
numlabel `new', mask(#) add
label value `new' `new'
note `new': Original: `old' | `tag'





*Sociodemographic variables
local new "oxygen_max"
local old "maxvo2_"
gen `new'=`old'/100 if `old'<99900
label var `new' "Estimated Maximum Oxygen Consumption (mL/kg/min)"
note `new': Original: `old' | `tag'


local new "yearsold"
local old "_age80"
gen `new'=`old' if `old'<=80
label var `new' "Age"
note `new': Original: `old' | `tag'



local new "agegroup"
local old "yearsold"
gen `new'=0 if `old'>=18 & `old'<=20
replace `new'=1 if `old'>=21 & `old'<=25
replace `new'=2 if `old'>=26 & `old'<=30
replace `new'=3 if `old'>=31 & `old'<=35
replace `new'=4 if `old'>=36 & `old'<=40
replace `new'=5 if `old'>=41 & `old'<=45
replace `new'=6 if `old'>=46 & `old'<=50
replace `new'=7 if `old'>=51 & `old'<=55
replace `new'=8 if `old'>=56 & `old'<=60
replace `new'=9 if `old'>=61 & `old'<=65
replace `new'=10 if `old'>=66 & `old'<=70
replace `new'=11 if `old'>=71 & `old'<=75
replace `new'=12 if `old'>=76 & `old'<=80
label var `new' "Age Group"
label define `new' 0"18-20" 1"21-25" 2"26-30" 3"31-35" 4"36-40" 5"41-45" 6"46-50" 7"51-55" 8"56-60" 9"61-65" 10"66-70" 11"71-75" 12"76-80"
numlabel `new', mask(#_) add
label value `new' `new'
note `new': Original: `old' | `tag'


local new "agegroup_sim"
local old "agegroup"
gen `new'=0 if `old'>=0 & `old'<=4
replace `new'=1 if `old'>=5 & `old'<=9
replace `new'=2 if `old'>=10 & `old'<=12
label var `new' "Age Group"
label define `new' 0"18-40" 1"41-65" 2"66-80"
numlabel `new', mask(#_) add
label value `new' `new'
note `new': Original: `old' | `tag'


local new "educ"
local old "educa"
gen `new'=0 if `old'>=1 & `old'<=3
replace `new'=1 if `old'==4
replace `new'=2 if `old'==5
replace `new'=3 if `old'==6
label var `new' "Education"
label define `new' 0"BelowHighSch" 1"HighSch" 2"SomeCollege" 3"CollegeorAbove"
numlabel `new', mask(#) add
label value `new' `new'
note `new': Original: `old' | `tag'


local new "edu"
local old "educa"
gen `new'=0 if `old'>=1 & `old'<=4
replace `new'=1 if `old'==5
replace `new'=2 if `old'==6
label var `new' "Education"
label define `new' 0"HighSchorBelow" 1"SomeCollege" 2"CollegeorAbove"
numlabel `new', mask(#) add
label value `new' `new'
note `new': Original: `old' | `tag'


local new "employstatus_full"
local old "employ1"
gen `new'=0 if `old'>=1 & `old'<=2
replace `new'=1 if `old'>=3 & `old'<=4
replace `new'=2 if `old'==5
replace `new'=3 if `old'==6
replace `new'=4 if `old'==7
replace `new'=5 if `old'==8
label var `new' "Employment Status"
label define `new' 0"Employed" 1"Unemployed" 2"Homemaker" 3"Student" 4"Retired" 5"UnabletoWork"
numlabel `new', mask(#) add
label value `new' `new'
note `new': Original: `old' | `tag'



local new "employstatus"
local old "employ1"
gen `new'=0 if `old'>=1 & `old'<=2
replace `new'=1 if `old'>=3 & `old'<=4
replace `new'=2 if `old'>=5 & `old'<=8
label var `new' "Employment Status"
label define `new' 0"Employed" 1"Unemployed" 2"Other"
numlabel `new', mask(#) add
label value `new' `new'
note `new': Original: `old' | `tag'


local new "male"
local old "sex"
gen `new'=0 if `old'==2
replace `new'=1 if `old'==1
label var `new' "Sex"
label define `new' 0"Female" 1"Male"
numlabel `new', mask(#) add
label value `new' `new'
note `new': Original: `old' | `tag'


local new "hispanic"
local old "_hispanc"
gen `new'=2-`old' if `old'<9
label var `new' "Hispanic"
label define `new' 0"Non-Hispanic" 1"Hispanic"
numlabel `new', mask(#) add
label value `new' `new'
note `new': Original: `old' | `tag'


local new "prace"
local old "_prace1"
gen `new'=0 if `old'==1
replace `new'=1 if `old'==2
replace `new'=2 if `old'==3
replace `new'=3 if `old'==4
replace `new'=4 if `old'==5
replace `new'=5 if `old'>=6 & `old'<=8
label var `new' "Preferred Racial Group"
label define `new' 0"White" 1"Black" 2"NativeAm" 3"Asian" 4"NativeHawaiianPacIslander" 5"Other"
numlabel `new', mask(#) add
label value `new' `new'
note `new': Original: `old' | `tag'


local new "praceethnicgpf" //Note that those who didn't respond to Hispanic question are missing in this variable
local old1 "prace"
local old2 "hispanic"
gen `new'=0 if `old1'==0 & `old2'==0
replace `new'=1 if `old1'==1 & `old2'==0
replace `new'=2 if `old1'==1 & `old2'==1
replace `new'=3 if `old1'==0 & `old2'==1
replace `new'=4 if `old1'==2
replace `new'=5 if `old1'==3
replace `new'=6 if `old1'==4 & `old2'==0
replace `new'=7 if `old1'==4 & `old2'==1
replace `new'=8 if `old1'==5
label var `new' "Racial/ethnic Group"
label define `new' 0"Non-HispanicWhite" 1"Non-HispanicBlack" 2"HispanicBlack" 3"HispanicWhite" 4"NativeAm" ///
5"Asian" 6"Non-HispanicNHPI" 7"HispanicNHPI" 8"Other"
numlabel `new', mask(#) add
label value `new' `new'
note `new': Original: `old' | `tag'


local new "praceethnicgp" //Note that those who didn't respond to Hispanic question are missing in this variable
local vold "praceethnicgpf"
gen `new'=0 if `vold'==0
replace `new'=1 if `vold'==1
replace `new'=2 if `vold'==2
replace `new'=3 if `vold'==3
replace `new'=4 if `vold'==6
replace `new'=5 if `vold'==7
replace `new'=6 if `vold'==4 | `vold'==5 | `vold'==8
label var `new' "Racial/ethnic Group"
label define `new' 0"WhiteNon-Hispanic" 1"Non-HispanicBlack" 2"HispanicBlack" ///
3"HispanicWhite" 4"Non-HispanicNHPI" 5"HispanicNHPI" 6"Other"
numlabel `new', mask(#) add
label value `new' `new'
note `new': Refer to `vold' | `tag'


local new "praceethnicgp_sim" //Note that I force those who responded to race question but not Hispanic question to be either Black, NHPI, or other
local vold1 "praceethnicgpf"
local vold2 "prace"
gen `new'=0 if `vold1'==0
replace `new'=1 if `vold1'==1 | `vold1'==2 | `vold2'==1
replace `new'=2 if `vold1'==3
replace `new'=3 if `vold1'==6 | `vold1'==7 | `vold2'==4
replace `new'=4 if `vold1'==4 | `vold1'==5 | `vold1'==8 | `vold2'==2 | `vold2'==3 | `vold2'==5
label var `new' "Racial/ethnic Group"
label define `new' 0"WhiteNon-Hispanic" 1"Black" 2"HispanicWhite" 3"NHPI" 4"Other"
numlabel `new', mask(#) add
label value `new' `new'
note `new': Refer to `vold' | `tag'




local new "mrace"
local old "_mrace1"
gen `new'=0 if `old'==1
replace `new'=1 if `old'==2
replace `new'=2 if `old'==3
replace `new'=3 if `old'==4
replace `new'=4 if `old'==5
replace `new'=5 if `old'==6
replace `new'=6 if `old'==7
label var `new' "Multiracial Racial Categorization"
label define `new' 0"White" 1"Black" 2"NativeAm" 3"Asian" 4"NativeHawaiianPacIslander" 5"Other" 6"Multiracial"
numlabel `new', mask(#) add
label value `new' `new'
note `new': Original: `old' | `tag'


local new "mraceethnicgpf" //Note that those who didn't respond to Hispanic question are missing in this variable
local old1 "mrace"
local old2 "hispanic"
gen `new'=0 if `old1'==0 & `old2'==0
replace `new'=1 if `old1'==1 & `old2'==0
replace `new'=2 if `old1'==1 & `old2'==1
replace `new'=3 if `old1'==0 & `old2'==1
replace `new'=4 if `old1'==2
replace `new'=5 if `old1'==3
replace `new'=6 if `old1'==4 & `old2'==0
replace `new'=7 if `old1'==4 & `old2'==1
replace `new'=8 if `old1'==5
label var `new' "Racial/ethnic Group"
label define `new' 0"Non-HispanicWhite" 1"Non-HispanicBlack" 2"HispanicBlack" 3"HispanicWhite" 4"NativeAm" ///
5"Asian" 6"Non-HispanicNHPI" 7"HispanicNHPI" 8"Other"
numlabel `new', mask(#) add
label value `new' `new'
note `new': Original: `old' | `tag'


local new "mraceethnicgp" //Note that those who didn't respond to Hispanic question are missing in this variable
local vold "mraceethnicgpf"
gen `new'=0 if `vold'==0
replace `new'=1 if `vold'==1
replace `new'=2 if `vold'==2
replace `new'=3 if `vold'==3
replace `new'=4 if `vold'==6
replace `new'=5 if `vold'==7
replace `new'=6 if `vold'==4 | `vold'==5 | `vold'==8
label var `new' "Racial/ethnic Group"
label define `new' 0"WhiteNon-Hispanic" 1"Non-HispanicBlack" 2"HispanicBlack" ///
3"HispanicWhite" 4"Non-HispanicNHPI" 5"HispanicNHPI" 6"Other"
numlabel `new', mask(#) add
label value `new' `new'
note `new': Refer to `vold' | `tag'


local new "multiracial_mark"
local old1 "_prace1"
local old2 "_mrace1"
gen `new'=0 if `old1'==`old2' & `old1'<77
replace `new'=1 if `old1'!=`old2' & `old1'<77
label var `new' "Multiracial Marker"
label define `new' 0"SingleRaceOrNoPreferred" 1"PreferredRaceandMultiracial"
numlabel `new', mask(#) add
label value `new' `new'
note `new': Original: `old' | `tag'


local new "praceethnicgp_multi" //Note that I force those who responded to race question but not Hispanic question to be either Black, NHPI, or other
local vold1 "praceethnicgp_sim"
local vold2 "multiracial_mark"
gen `new'=0 if `vold1'==0 & `vold2'==0
replace `new'=1 if `vold1'==0 & `vold2'==1
replace `new'=2 if `vold1'==1 & `vold2'==0
replace `new'=3 if `vold1'==1 & `vold2'==1
replace `new'=4 if `vold1'==2 & `vold2'==0
replace `new'=5 if `vold1'==2 & `vold2'==1
replace `new'=6 if `vold1'==3 & `vold2'==0
replace `new'=7 if `vold1'==3 & `vold2'==1
replace `new'=8 if `vold1'==4 & `vold2'==0
replace `new'=9 if `vold1'==4 & `vold2'==1
label var `new' "Racial/ethnic Group"
label define `new' 0"nHWhite_Single" 1"nHwhite_Multi" 2"Black_Single" 3"Black_Multi" ///
4"HWhite_Single" 5"HWhite_Multi" 6"NHPI_Single" 7"NHPI_Multi" 8"Other_Single" 9"Other_Multi"
numlabel `new', mask(#) add
label value `new' `new'
note `new': Refer to `vold1' `vold2' | `tag'


local new "praceethnicgp_multi_sim" //Note that I force those who responded to race question but not Hispanic question to be either Black, NHPI, or other
local vold "praceethnicgp_multi"
gen `new'=0 if `vold'==0
replace `new'=1 if `vold'==1
replace `new'=2 if `vold'==6
replace `new'=3 if `vold'==7
replace `new'=4 if (`vold'>=2 & `vold'<=5) | (`vold'>=8 & `vold'<=9)
label var `new' "Racial/ethnic Group"
label define `new' 0"nHWhite_Single" 1"nHwhite_Multi" 2"NHPI_Single" 3"NHPI_Multi" 4"Other"
numlabel `new', mask(#) add
label value `new' `new'
note `new': Refer to `vold' | `tag'


local new "praceethnicgp_multi_sim2" //Note that I force those who responded to race question but not Hispanic question to be either Black, NHPI, or other
local vold "praceethnicgp_multi_sim"
gen `new'=0 if `vold'==0 | `vold'==1
replace `new'=1 if `vold'==2
replace `new'=2 if `vold'==3
replace `new'=3 if `vold'==4
label var `new' "Racial/ethnic Group"
label define `new' 0"nHWhite" 1"NHPI_Single" 2"NHPI_Multi" 3"Other"
numlabel `new', mask(#) add
label value `new' `new'
note `new': Refer to `vold' | `tag'


local new "praceethnicgp_multi_altsim" //Note that I force those who responded to race question but not Hispanic question to be either Black, NHPI, or other
local vold "praceethnicgp_multi"
gen `new'=0 if `vold'==0
replace `new'=1 if `vold'==1
replace `new'=2 if `vold'==2
replace `new'=3 if `vold'==3
replace `new'=4 if `vold'==4 | `vold'==5
replace `new'=5 if `vold'==6
replace `new'=6 if `vold'==7
replace `new'=7 if `vold'==8
replace `new'=8 if `vold'==9
label var `new' "Racial/ethnic Group"
label define `new' 0"nHWhite_Single" 1"nHwhite_Multi" 2"Black_Single" 3"Black_Multi" 4"HWhite" 5"NHPI_Single" 6"NHPI_Multi" 7"Other_Single" 8"Other_Multi"
numlabel `new', mask(#) add
label value `new' `new'
note `new': Refer to `vold' | `tag'


local new "praceethnicgp_multi_simplest" //Note that I force those who responded to race question but not Hispanic question to be either Black, NHPI, or other
local vold "praceethnicgp_multi"
gen `new'=0 if `vold'==0
replace `new'=1 if `vold'==1
replace `new'=2 if `vold'>=6 & `vold'<=7
replace `new'=3 if (`vold'>=2 & `vold'<=5) | (`vold'>=8 & `vold'<=9)
label var `new' "Racial/ethnic Group"
label define `new' 0"nHWhite_Single" 1"nHwhite_Multi" 2"NHPI" 3"Other"
numlabel `new', mask(#) add
label value `new' `new'
note `new': Refer to `vold' | `tag'



local new "hhincome"
local old "income2"
gen `new'=0 if `old'==1
replace `new'=1 if `old'==2
replace `new'=2 if `old'==3
replace `new'=3 if `old'==4
replace `new'=4 if `old'==5
replace `new'=5 if `old'==6
replace `new'=6 if `old'==7
replace `new'=7 if `old'==8
label var `new' "Household Income"
label define `new' 0"<10000" 1"10000-14999" 2"15000-19999" 3"20000-24999" 4"25000-34999" 5"35000-49999" 6"50000-74999" 7"75000+"
numlabel `new', mask(#_) add
label value `new' `new'
note `new': Original: `old' | `tag'


local new "sexualorientation"
local old "sxorient"
gen `new'=`old'-1 if `old'<=4
label var `new' "Sexual orientation"
label define `new' 0"Heterosexual" 1"Homosexual" 2"Bisexual" 3"Other"
numlabel `new', mask(#) add
label value `new' `new'
note `new': Original: `old' | `tag'


local new "sexualorientation_sim"
local old "sexualorientation"
gen `new'=0 if `old'==0
replace `new'=1 if `old'>=1 & `old'<=3
label var `new' "Sexual orientation"
label define `new' 0"Heterosexual" 1"Other"
numlabel `new', mask(#) add
label value `new' `new'
note `new': Original: `old' | `tag'




*Technical variables
local new "svyweight"
local old "_llcpwt"
gen `new'=`old'
label var `new' "Survey weight"
note `new': Original: `old' | `tag'


local new "strata"
local old "_ststr"
gen `new'=`old'
label var `new' "Strata"
note `new': Original: `old' | `tag'


local new "psu"
local old "_psu"
gen double `new'=`old'
label var `new' "PSU"
note `new': Original: `old' | `tag'


local new "state"
local old "_state"
gen `new'=`old'
label var `new' "State FIPS Code"
note `new': Original: `old' | `tag'


local new "month"
local old "imonth"
gen `new'=`old'
label var `new' "Interview Month"
note `new': Original: `old' | `tag'


local new "year"
local old "iyear"
gen `new'=`old'
label var `new' "Interview Year"
note `new': Original: `old' | `tag'


local new "interview_comp"
local old "dispcode"
gen `new'=0 if `old'==1200
replace `new'=1 if `old'==1100
label var `new' "Interview Complete"
label define `new' 0"PartialInterview" 1"CompletedInterview"
numlabel `new', mask(#) add
label value `new' `new'
note `new': Original: `old' | `tag'


local new "metrogroup"
local old "mscode"
gen `new'=0 if `old'==1
replace `new'=1 if `old'==2
replace `new'=2 if `old'==3
replace `new'=3 if `old'==5
replace `new'=4 if `old'==.
label var `new' "Metropolitan Status Code"
label define `new' 0"CenterCityinMSA" 1"OutsideCityInCounty" 2"SuburbanCounty" 3"OutsideMSA" 4"GU/PR/VI/Cell"
numlabel `new', mask(#) add
label value `new' `new'
note `new': Original: `old' | `tag'



local new "censusdivision"
local old "state"
gen `new'=0 if `old'==9 | `old'==23 | `old'==25 | `old'==33 | `old'==44 | `old'==50
replace `new'=1 if `old'==34 | `old'==36 | `old'==42
replace `new'=2 if `old'==17 | `old'==18 | `old'==26 | `old'==39 | `old'==55
replace `new'=3 if `old'==19 | `old'==20 | `old'==27 | `old'==29 | `old'==31 | `old'==38 | `old'==46
replace `new'=4 if `old'==12 | `old'==13 | `old'==37 | `old'==45 | `old'==51 | `old'==11 | `old'==24 | `old'==10 | `old'==54
replace `new'=5 if `old'==1  | `old'==21 | `old'==28 | `old'==47
replace `new'=6 if `old'==5 | `old'==22 | `old'==40 | `old'==48
replace `new'=7 if `old'==4 | `old'==8 | `old'==16 | `old'==30 | `old'==32 | `old'==35 | `old'==49 | `old'==56
replace `new'=8 if `old'==2 | `old'==6 | `old'==15 | `old'==41 | `old'==53
replace `new'=9 if `old'==66
replace `new'=10 if `old'==72
label var `new' "Census Division"
label define `new' 0"NewEngland" 1"MidAtlantic" 2"ENCentral" 3"WNCentral" 4"SAtlantic" 5"ESCentral" 6"WSCentral" 7"Mountain" 8"Pacific" 9"Guam" 10"PuertoRico"
numlabel `new', mask(#) add
label value `new' `new'
note `new': Original: `old' | `tag'


local new "censusregion"
local old "censusdivision"
gen `new'=0 if `old'>=0 & `old'<=1
replace `new'=1 if `old'>=2 & `old'<=3
replace `new'=2 if `old'>=4 & `old'<=6
replace `new'=3 if `old'>=7 & `old'<=8
replace `new'=4 if `old'==9
replace `new'=5 if `old'==10
label var `new' "Census Region"
label define `new' 0"Northeast" 1"MidWest" 2"South" 3"West" 4"Guam" 5"PuertoRico"
numlabel `new', mask(#) add
label value `new' `new'
note `new': Original: `old' | `tag'


local new "censusregion_sim"
local old "censusregion"
gen `new'=0 if `old'>=0 & `old'<=2
replace `new'=1 if `old'==3 & state!=15
replace `new'=2 if state==15 //Put Hawaii into a separate category
replace `new'=3 if `old'==4
replace `new'=4 if `old'==5
label var `new' "Census Region"
label define `new' 0"Eastern" 1"West" 2"Hawaii" 3"Guam" 4"PuertoRico"
numlabel `new', mask(#) add
label value `new' `new'
note `new': Original: `old' | `tag'





*For gsem estimation
gen exe_intensity_sim_m=exe_intensity_sim if male==1
gen exe_intensity_sim_f=exe_intensity_sim if male==0

gen exe_intensity_simplest_m=exe_intensity_simplest if male==1
gen exe_intensity_simplest_f=exe_intensity_simplest if male==0

gen exe_aerobic_advice_m=exe_aerobic_advice if male==1
gen exe_aerobic_advice_f=exe_aerobic_advice if male==0

gen exe_strength_advice_m=exe_strength_advice if male==1
gen exe_strength_advice_f=exe_strength_advice if male==0

gen exe_active_m=exe_active if male==1
gen exe_active_f=exe_active if male==0


local dvlist exe_intensity_sim_m exe_intensity_sim_f exe_aerobic_advice_m exe_aerobic_advice_f
local dvlist `dvlist' exe_strength_advice_m exe_strength_advice_f exe_active_m exe_active_f exe_intensity_simplest_m exe_intensity_simplest_f
foreach var in `dvlist' {
	forvalues dvnum=1(1)20 {
		gen `var'`dvnum'=`var'
	}
}

/*
*Generate census region-specific DV
foreach var in `dvlist' {
	local `var'_
	forvalues dvnum=1(1)20 {
		gen `var'`dvnum'=`var'
	}
}
*/

*Generate outcomes by census region
gen exe_intensity_sim_m_cr0=exe_intensity_sim_m if censusregion_sim==0
gen exe_intensity_sim_m_cr1=exe_intensity_sim_m if censusregion_sim==1
gen exe_intensity_sim_m_cr2=exe_intensity_sim_m if censusregion_sim==2
gen exe_intensity_sim_m_cr3=exe_intensity_sim_m if censusregion_sim==3
gen exe_intensity_sim_m_cr4=exe_intensity_sim_m if censusregion_sim==4

gen exe_intensity_sim_f_cr0=exe_intensity_sim_f if censusregion_sim==0
gen exe_intensity_sim_f_cr1=exe_intensity_sim_f if censusregion_sim==1
gen exe_intensity_sim_f_cr2=exe_intensity_sim_f if censusregion_sim==2
gen exe_intensity_sim_f_cr3=exe_intensity_sim_f if censusregion_sim==3
gen exe_intensity_sim_f_cr4=exe_intensity_sim_f if censusregion_sim==4


gen exe_intensity_simplest_m_cr0=exe_intensity_simplest_m if censusregion_sim==0
gen exe_intensity_simplest_m_cr1=exe_intensity_simplest_m if censusregion_sim==1
gen exe_intensity_simplest_m_cr2=exe_intensity_simplest_m if censusregion_sim==2
gen exe_intensity_simplest_m_cr3=exe_intensity_simplest_m if censusregion_sim==3
gen exe_intensity_simplest_m_cr4=exe_intensity_simplest_m if censusregion_sim==4

gen exe_intensity_simplest_f_cr0=exe_intensity_simplest_f if censusregion_sim==0
gen exe_intensity_simplest_f_cr1=exe_intensity_simplest_f if censusregion_sim==1
gen exe_intensity_simplest_f_cr2=exe_intensity_simplest_f if censusregion_sim==2
gen exe_intensity_simplest_f_cr3=exe_intensity_simplest_f if censusregion_sim==3
gen exe_intensity_simplest_f_cr4=exe_intensity_simplest_f if censusregion_sim==4





gen exe_aerobic_advice_m_cr0=exe_aerobic_advice_m if censusregion_sim==0
gen exe_aerobic_advice_m_cr1=exe_aerobic_advice_m if censusregion_sim==1
gen exe_aerobic_advice_m_cr2=exe_aerobic_advice_m if censusregion_sim==2
gen exe_aerobic_advice_m_cr3=exe_aerobic_advice_m if censusregion_sim==3
gen exe_aerobic_advice_m_cr4=exe_aerobic_advice_m if censusregion_sim==4

gen exe_aerobic_advice_f_cr0=exe_aerobic_advice_f if censusregion_sim==0
gen exe_aerobic_advice_f_cr1=exe_aerobic_advice_f if censusregion_sim==1
gen exe_aerobic_advice_f_cr2=exe_aerobic_advice_f if censusregion_sim==2
gen exe_aerobic_advice_f_cr3=exe_aerobic_advice_f if censusregion_sim==3
gen exe_aerobic_advice_f_cr4=exe_aerobic_advice_f if censusregion_sim==4

gen exe_strength_advice_m_cr0=exe_strength_advice_m if censusregion_sim==0
gen exe_strength_advice_m_cr1=exe_strength_advice_m if censusregion_sim==1
gen exe_strength_advice_m_cr2=exe_strength_advice_m if censusregion_sim==2
gen exe_strength_advice_m_cr3=exe_strength_advice_m if censusregion_sim==3
gen exe_strength_advice_m_cr4=exe_strength_advice_m if censusregion_sim==4

gen exe_strength_advice_f_cr0=exe_strength_advice_f if censusregion_sim==0
gen exe_strength_advice_f_cr1=exe_strength_advice_f if censusregion_sim==1
gen exe_strength_advice_f_cr2=exe_strength_advice_f if censusregion_sim==2
gen exe_strength_advice_f_cr3=exe_strength_advice_f if censusregion_sim==3
gen exe_strength_advice_f_cr4=exe_strength_advice_f if censusregion_sim==4


gen exe_active_m_cr0=exe_active_m if censusregion_sim==0
gen exe_active_m_cr1=exe_active_m if censusregion_sim==1
gen exe_active_m_cr2=exe_active_m if censusregion_sim==2
gen exe_active_m_cr3=exe_active_m if censusregion_sim==3
gen exe_active_m_cr4=exe_active_m if censusregion_sim==4

gen exe_active_f_cr0=exe_active_f if censusregion_sim==0
gen exe_active_f_cr1=exe_active_f if censusregion_sim==1
gen exe_active_f_cr2=exe_active_f if censusregion_sim==2
gen exe_active_f_cr3=exe_active_f if censusregion_sim==3
gen exe_active_f_cr4=exe_active_f if censusregion_sim==4



svyset [pweight=svyweight], strata(strata) psu(psu) singleunit(centered)



drop if mi(male) //don't want to impute missing values of gender

gen id=_n //generate unique respondent's ID


compress


save "Data Ready for Analysis/brfss2015-readyforanalysis.dta",replace



*Ouput data for multiple imputation
drop if state==72 //delete Puerto Rico

keep id svyweight strata psu /// technical variables
exercise exercise1type exercise1freq exercise1min exercise2type exercise2freq exercise2min muscleexercise exe1_intensity exe2_intensity exe_intensity_simplest exe_aerobic_advice exe_strength_advice exe_active /// physical activities
state yearsold male educ hhincome praceethnicgp_multi_sim ///
srh badphyhlth badmenhlth bmicat employstatus_full //auxiliary variables

save "Data Ready for Analysis/brfss2015-readyforimpute.dta",replace



*Ouput data for listwise analysis
use "Data Ready for Analysis/brfss2015-readyforanalysis.dta",clear

keep svyweight strata psu /// technical variables
exe_aerobic_advice_m* exe_strength_advice_m* exe_aerobic_advice_f* exe_strength_advice_f* state yearsold year praceethnicgp_multi_sim male educ hhincome exe_aerobic_advice_m_cr* exe_strength_advice_m_cr* exe_aerobic_advice_f_cr* exe_strength_advice_f_cr*
save "Data Ready for Analysis/brfss2015-readyforlistwiseanalysis.dta",replace



*save `pgm'.dta,replace

*estwrite * using `pgm',replace


log close
exit

